IVS_PATH = "../replication_regional_data/raw/world_values_survey/outIntegrated_values_surveys_1981-2021.dta"

# See README for instructions on how to access this data
EVS_RESTRICTED_ZIP = "../replication_regional_data/raw/european_values_survey/ZA7504_EVS-Trend_sensitive_V2.zip"
EVS_EXDIR = "../replication_regional_data/raw/european_values_survey"
EVS_RESTIRCTED_PATH = '../replication_regional_data/raw/european_values_survey/ZA7504_v2-0-0.dta'


#' This function loads in the integrated values survey and cleans it for downstream use.
#'
#' @param ivs_path the path to the IVS file produced by the stata script provided by the WVS foundation
#'
#' @returns tibble with IVS data
load_ivs <- function(ivs_path) {
  ivs = read_stata(ivs_path)

  ivs_subset = ivs %>%
    select(
      study = S001,
      wave_wvs = S002,
      wave_evs = S002EVS,
      original_respondent_number = S006,
      unified_respondent_number = S007,
      trust_people_meet_first_time = G007_34_B,
      interview_date = S012,
      country_iso2 = S009,
    )

  ivs_subset = ivs_subset %>%
    mutate(
      country_cleaned = case_when(
        country_iso2 == "GB-GBN" ~ "GB",
        country_iso2 == "GB-NIR" ~ "GB",
        country_iso2 == "RS-KM" ~ "XK",
        country_iso2 == "CY-TCC" ~ "CY",
        TRUE ~ country_iso2
      )
    ) %>%
    filter(
      # this respondent from the WVS wave 3 has many duplicates which explode on join
      original_respondent_number != 858069901
    )

  return(ivs_subset)
}


#' This function loads restricted access data from the European Values Survey.
#' This dataset is accessible through a licensing agreement with GESIS and contains
#' NUTS3 region information on each user.
#'
#' @param evs_zip path to the zipped restricted data
#' @param exdir path to extract restricted data to
#' @param evs_stata_out path to extracted state file
#'
#' @return processed EVS data with nuts3 region assignments
load_restricted_evs_data <- function(evs_zip, exdir, evs_stata_out) {
  unzip(evs_zip, exdir = exdir)
  evs_restricted = read_stata(evs_stata_out)
  evs_restricted = evs_restricted %>%
    select(
      evs_wave = S002EVS,
      country_iso2 = S009,
      original_respondent_number_evs = S006,
      unified_respondent_number = S007,
      nuts3_region_2006 = x048c_n3,
      nuts3_region_2016 = X048J_N3
    ) %>%
    mutate(
      nuts3_region_2006 = if_else(
        nuts3_region_2006 %in% c(".c", ".d", '-4'),
        NA,
        nuts3_region_2006
      ),
      nuts3_region_2016 = if_else(
        nuts3_region_2016 %in% c(".c", ".d", '-4'),
        NA,
        nuts3_region_2016
      )
    )

  return(evs_restricted)
}


ivs = load_ivs(IVS_PATH)
evs_restricted = load_restricted_evs_data(
  EVS_RESTRICTED_ZIP,
  EVS_EXDIR,
  EVS_RESTIRCTED_PATH
)

ivs_joined = ivs %>%
  left_join(evs_restricted, by = "unified_respondent_number")


ivs_filtered = ivs_joined %>%
  filter(
    country_iso2.y == "DE",
    evs_wave == 5,
    !is.na(trust_people_meet_first_time)
  ) %>%
  mutate(
    nuts2_region_2016 = substr(nuts3_region_2016, 1, 4),
    trust_people_binary = ifelse(
      trust_people_meet_first_time %in% c(1, 2),
      1,
      0
    )
  ) %>%
  select(
    nuts3_region_2016,
    nuts2_region_2016,
    trust_people_binary
  )

nuts3_agg = ivs_filtered %>%
  group_by(nuts3_region_2016) %>%
  summarize(
    n_respondents = n(),
    percent_trust_strangers = mean(trust_people_binary)
  )

nuts2_agg = ivs_filtered %>%
  group_by(nuts2_region_2016) %>%
  summarize(
    n_respondents = n(),
    percent_trust_strangers = mean(trust_people_binary)
  )

write_csv(nuts3_agg, "../replication_regional_data/derived/nuts3_evs_responses.csv")
write_csv(nuts2_agg, "../replication_regional_data/derived/nuts2_evs_responses.csv")
